####Independent Expenditures Data####
library(tidyr)
library(dplyr)
library(ggplot2)
library(stats)
library(stargazer)
library(xtable)
library(foreign)
library(readstata13)
library(xlsx)
library(stringr)
library(devtools)
library(blscrapeR)
library(gdata)
library(openxlsx)
library(zoo)
library(tseries)
library(plm)
library(ecm)
library(tseries)
library(lmtest)
library(car)
install.packages('pglm')
library('pglm')

setwd("~/Documents/Harvard/G_Semester2/GOV 2001/Rep_Articles/PrillamanRep")




####State Per Capita Income and PctChange####
PCAP<-read.csv("BEA19292017_BEAPINCOME.csv", skip=4)

##Drop regional and USA data
INC_PCAP<-slice(PCAP, 2:52)
##Drop DC
INC_PCAP<-filter(INC_PCAP, GeoFips !=11000)


##Convert from wide to long
INC_PCAP<-gather(INC_PCAP, "Year", "inc_pcap", 3:91)

##Convert Years from XXXX-XXXX to XXXX
INC_PCAP<-mutate(INC_PCAP, Year=as.numeric(gsub("X", "", Year)), GeoName=gsub("\\*","", GeoName),
                 inc_pcap=as.numeric(inc_pcap))

##Drop Fips column
INC_PCAP<-select(INC_PCAP, -GeoFips)

##Rename Columns
colnames(INC_PCAP)<-c("State", "Year", "inc_pcap")

##Select Observations after 1960
INC_PCAP<-filter(INC_PCAP, Year > 1959)

##Calculate percentage differnece
INC_PCAP<-INC_PCAP %>%
  group_by(State) %>%
  mutate(lagged = lag(inc_pcap), pctchangeinc_pcap = 100* ((inc_pcap - lagged)/lagged))

##Select Relevant columns
INC_PCAP<-select(INC_PCAP, -lagged)

####State Level Unemployment####
unemp<-read.xlsx("ststdsadata.xlsx", sheet = 1, startRow=7)#Contains 1976-2017
CFR<-read.csv("CampaignFinanceRegs.csv")# contains 1970-1975
CFR$State<-trimws(CFR$State)

##Fix column names
colnames(unemp)<-c("FIPS", "State", "Year", "Month", "CivNonInstPop", "LaborForceTotal", "LaborForcePopPercentage",
                   "TotalEmployment", "TotalEmploymentPct", "UnemploymentTotal", "UnemploymentRate")

##Select March observations for state totals
unemp_pct<-filter(unemp, FIPS !="037" & FIPS !="51000" & Month=="03" & FIPS !="11")


##Clean CFR and bind to 1975
EarlyUnemp<-filter(CFR, Year > 1969 & Year < 1976)
EarlyUnemp<-select(EarlyUnemp, State, Year, CurrentUnempMethodology)
colnames(EarlyUnemp)<-c("State", "Year", "UnemploymentRate")

##Select employment rate and state year columns
unemp_pct<-select(unemp_pct, State, Year, UnemploymentRate)

##Bind EarlyUnemp and current unemp
unemp_pct<-rbind(unemp_pct, EarlyUnemp)

##Arrange for change calculation
unemp_pct<-arrange(unemp_pct, State, Year)
##Calculate year over year change in total employment pct
unemp_pct<-unemp_pct %>%
  group_by(State) %>%
  mutate(UnemploymentChange=c(NA, diff(UnemploymentRate)))
unemp_pct<-mutate(unemp_pct, Year=as.numeric(Year))


####Percent of state’s that is black####


####Dummy variable for tax or expenditure limitation####
##Legislative percentage that is required to pass a tax increase
##Union/Corporation Unlimited Independent Expenditure/Contribution Bans
##Tax and Expenditure Limits 1960-2018
##Data also includes unemployment rates
CFR<-read.csv("CampaignFinanceRegs.csv")

##Drop redundant unemployment information
CFR<-select(CFR, -UnemploymentRateManpower, -CurrentUnempMethodology)

CFR$State<-trimws(CFR$State)

##Fix NAs
CFR[is.na(CFR)]<- 0

##Add .5 for legislative percentage required to pass increase to fill in cells left blank
##during data entry
CFR<-mutate(CFR, SuperMajTaxRate=ifelse(SuperMajTaxRate==0, .5, SuperMajTaxRate))



####Union Density####
STATEUNIONDENSITY<-read.xlsx("State_Union_Membership_Density_1964-2017.xlsx", sheet = 1)
##Drop 1st row and all states
STATEUNIONDENSITY<-STATEUNIONDENSITY[-1,]
##Drop sheet info rows and drop Washington DC entry
STATEUNIONDENSITY<-filter(STATEUNIONDENSITY, is.na(State.ID)==FALSE & State.ID !=53)

##Convert from wide to long format
STATEUNION<-gather(STATEUNIONDENSITY, "Year", "Union_PCT", 3:56)

##Convert column names to years
STATEUNION<-mutate(STATEUNION, Year=as.numeric(gsub("%Mem", "",Year)))
STATEUNION<-mutate(STATEUNION, Year=ifelse(Year >17, Year+1900, Year+2000))

##Select relevant columns
STATEUNION<-select(STATEUNION, State.Name, Year, Union_PCT)

##Select relevant years
STATEUNION<-filter(STATEUNION, Year > 1959)

##Rename
colnames(STATEUNION)<-c("State", "Year", "Union_Pct")


####Southern politics dummy variable####
##Contiguous states



####Competitiveness of state legislative elections####
##1967-2010: (This data is super messy before like 1980 so unless we change our time frame it won't
##be that useful)
election<-read.csv("ICPSRFull.tsv", header=TRUE, stringsAsFactors=TRUE,sep="\t",
                   quote="")

subelection<-select(election, v02, v05, v07, v35, v24, v16, v17, v36, v09)
colnames(subelection)<-c("StateAbb", "Year", "Chamber","VictoryMargin", "ElectionWinner",
                         "ElectionType","StartSessionElection", "VictoryPercentage",
                         "DistrictID")

##Select Nebraska Senate, LA jungle primaries, 
subelection<-filter(subelection, ElectionWinner==1 & ElectionType=="G" & StartSessionElection==1
                    & Chamber==9 | (Chamber==8 & StateAbb=="NE" & ElectionType=="G" & StartSessionElection==1 &
                                      ElectionWinner==1) | (ElectionWinner==1 & (ElectionType=="LAR"|ElectionType=="LAF")
                    & StartSessionElection==1 & Chamber==9 & StateAbb=="LA") )
subelection<-distinct(subelection)
subelection[which(is.na(subelection$VictoryPercentage)),8]<-1

nj<-filter(subelection, StateAbb =="NJ")
x<-as.data.frame(xtabs(~StateAbb+Year, data=subelection))
xlook<-filter(x, Year==1972 | Year ==1973)
x<-arrange(x, StateAbb, Year)
subelection<-mutate(subelection, CompetitiveRace=ifelse(VictoryPercentage <=60, 1, 0))

elect_sum<-group_by(subelection, StateAbb, Year)
elect_sum<-summarize(elect_sum, CompetitivePct=sum(CompetitiveRace)/n())

##V22-Incumbency Dummy

##Get Incumbency Status and Previous Election Republican, Dem, and Incumbent Vote Totals for CT
##Republican vote totals=V30
##Democratic Vote totals = v31
##Highest vote percentage =v33
##Percent margin of victory=v35
##Election winner= v24
##V22-Incumbency Dummy
##V21-Simplified party code
##V20 Detailed party code

##After 2010
SLER<-read.csv("SLER1971-2016.csv")


SLER<-mutate(SLER, TotalVotes=Dem.Votes+GOP.Votes+Other.Votes, DemPct=Dem.Votes/TotalVotes,
             RepPct=GOP.Votes/TotalVotes, OtherPct=Other.Votes/TotalVotes, WinnerPct=pmax(DemPct, RepPct, OtherPct, na.rm = TRUE))
SLER[which(is.na(SLER$WinnerPct)),13 ]<-1
SLER<-mutate(SLER, CompetitiveRace=ifelse(WinnerPct<=.6, 1, 0))


SLER_SUM<-group_by(SLER, State, Year)
SLER_SUM<-summarize(SLER_SUM, CompetitivePct=sum(CompetitiveRace)/n())

SLER_SUM<-arrange(SLER_SUM, State, Year)




####Legislative Professionalism####
state_data<-read.csv("correlatesofstatepolicyprojectv1_11.csv", header=TRUE)
legpro<-select(state_data,  year, state,bowen_legprof_firstdim, bowen_legprof_seconddim)
legpro<-filter(legpro, year >1960 & state !="District of Columbia")

##Add row to bind frames to
outframe<-legpro[1,]

##interpolate occupational categories and legislative professionalism measures, ig_density, union_density
states<-sort(unique(legpro$state))
for(i in 1:length(states)){
  sub_state<-states[i]
  comb<-filter(legpro, state==sub_state)
  for(j in 1:ncol(comb)){
    if(length(which(is.na(comb[,j]))) >0 ){
      comb[,j]<-na.spline(comb[,j])
    }
  }
  outframe<-rbind(outframe, comb)
}

outframe<-outframe[-1,]



####Chamber/Gubernatorial Control####
pc<-read.csv("PartyControl.csv")
sf<-as.data.frame(cbind(state.abb, state.name))

##Select Relevant Columns
pc<-left_join(pc, sf, by=c("abb"="state.abb"))
pc<-select(pc, state.name, year, hs_dem_per_2pty, sen_dem_per_2pty, gov_party)

##Recode Governor party to be more clear
#pc<-mutate(pc, gov_party=ifelse(gov_party==0, "R", "D"))


####Business Taxation as pct of GSP####
tax<-read.xls("STC_Historical_DB (2016).xls")

##Drop information rows
tax<-slice(tax, -1)
##Remove state gov names
tax<-mutate(tax, Name = gsub(" STATE GOVT", "", Name))
##Drop US entries
tax<-filter(tax, State !="00" & State !="0" & Year > 1959)

tax[,5:36] = apply(tax[,5:36], 2, function(x) gsub(",","",x))
tax[,5:36] = apply(tax[,5:36], 2, function(x) as.numeric(as.character(x)))

##Create totals
tax<-mutate(tax, tottaxfull=1000*C105, busntax = 1000*(T01+T22+T28+T41))

##Select relevant tax columns
tax<-select(tax, Year, Name, tottaxfull, busntax)
tax<-arrange(tax, Name, Year)


##Add state names
sf<-as.data.frame(cbind(state.abb, state.name))

##Fix state names
tax<-left_join(tax, sf, by=c("Name"="state.abb"))
tax<-select(tax, -Name)

colnames(tax)<-c("Year", "tottaxfull","busntax", "State")

##Gross State Product Data 1997-2016
gspann<-read.csv("GSP97-16.csv", skip = 4)# In millions of current dollars


##Drop National and regional data 
gspann<-slice(gspann, 2:52)
##Drop DC
gspann<-filter(gspann, Fips != 11000)

##Convert to long format
gspann<-gather(gspann, "Year", "GSP", 3:22)

##Drop preceding X in year variable and convert to numeric
gspann<-mutate(gspann, Year=as.numeric(gsub("X", "", Year)))

##Drop 1997 duplicated in older gsp
gspann<-filter(gspann, Year >1997)
##1967-1997 Taxation Data
gsp6797<-read.csv("GSP67-97.csv", skip=4)#In millions of current dollars

##Drop National and regional data 
gsp6797<-slice(gsp6797, 2:52)
##Drop DC
gsp6797<-filter(gsp6797, Fips != 11000)

##Convert to long format
gsp6797<-gather(gsp6797, "Year", "GSP", 3:37)

##Drop preceding X in year variable and convert to numeric
gsp6797<-mutate(gsp6797, Year=as.numeric(gsub("X", "", Year)))

##merge old and new gsp data
gspall<-bind_rows(gsp6797, gspann)

##Select Relevant columns
gspall<-select(gspall, -Fips)

##Rename gspall 
colnames(gspall)<-c("State", "Year", "GSP")

##Convert to dollars
gspall<-mutate(gspall, GSP=GSP*1000000)

##Bind GSpall to tax 
taxgsp<-left_join(gspall, tax, by=c("State"="State", "Year"="Year"))

##Calculate DV
taxgsp<-mutate(taxgsp, busntaxprop=100*busntax/GSP, totaxprop=100*tottaxfull/GSP)

##Select relevant columns
taxgsp<-select(taxgsp, -tottaxfull,-busntax)

##
print<-select(taxgsp, State, Year)
write.csv(print, "print.csv")


####Corporate Income Tax Revenue per capita changes####

##State Population1900-2008
state_data<-read.csv("correlatesofstatepolicyprojectv1_11.csv", header=TRUE)
statepop<-select(state_data, state,  year, poptotal)

statepop882006<-filter(statepop, year > 1987 & state !="District of Columbia" & year < 2006)

statepop882006<-select(statepop882006, state, year, poptotal)
colnames(statepop882006)<-c("State", "Year", "Population")

##State Population 2010-2016
teens<-read.csv("20102016CensusPopByState.csv")

##Teens
##Select states without PR and DC
teens<-filter(teens, SUMLEV==40 & STATE != 72 & STATE !=11)

##Select population columns
teens<-select(teens, STATE, NAME, 8:15)

##Convert from wide to long format
teens<-gather(teens, "Year", "PopulationEstimate", 3:10)

##Drop POPESTIMATE from year variable
teens<-mutate(teens, Year=as.numeric(gsub("POPESTIMATE", "", Year)))


##State population 2000-2010
aughts<-read.csv("State2000-2010PopByAgeSex.csv")
#Select state level pop estimates minus US and DC
aughts<-filter(aughts, STATE !=0 & STATE !=11 & SEX==0 & AGE==999)

##select pop estimate columns
aughts<-select(aughts, STATE, NAME, 14:17)

##Convert from wide to long format
aughts<-gather(aughts, "Year", "PopulationEstimate", 3:6)

##Drop POPESTIMATE from year variable
aughts<-mutate(aughts, Year=as.numeric(gsub("POPESTIMATE", "", Year)), PopulationEstimate=PopulationEstimate)

##Merge Aughts and Teens
CENSUS_POP<-rbind(aughts, teens)

CENSUS_POP<-select(CENSUS_POP, -STATE)

colnames(CENSUS_POP)<-c("State", "Year", "Population")

##bind recent with old population data
CENSUS_POP<-rbind(CENSUS_POP, statepop882006)



##Read in corporate income and bind to census population
corpincome<-read.csv("CorporateIncomeTax.csv")

colnames(corpincome)
corpincome[is.na(corpincome)]<-0
corpincome<-filter(corpincome, Year > 1987)

corpincome<-left_join(corpincome, CENSUS_POP, by=c("State"="State", "Year"="Year"))

##Convert corporate income to millions
corpincome<-mutate(corpincome, BusinessRevenueChange=1000000*(BusinessTaxesChanges+PersonalIncomeTax+SalesTax+OtherTax))

##Scale Corporate Income Tax Revenue from fiscal year to 2010 dollars
df <- bls_api("CUSR0000SA0")
df <- inflation_adjust(2010)
df$year<-as.numeric(df$year)##Convert BLS scrape to numeric

##Set base year    
baseCPI<-df[[which(df$year==2010), 2]]
## Loop through all years and convert to 2007/base year dollars
for(i in 1:nrow(corpincome)){
  id<-which(df$year==corpincome[[i,2]])
  Obs_CPI<-df[[id,2]]##Find CPI for Year of data observation
  corpincome[i,5]<-(corpincome[[i,5]])*(baseCPI/Obs_CPI)
}

##Calculate per capita business tax revenue change
corpincome<-mutate(corpincome, BusinessTaxChange_pcap=BusinessRevenueChange/Population)

##Join corporate income tax changes
taxgsp<-left_join(taxgsp, corpincome, by=c("State"="State", "Year"="Year"))
taxgsp[is.na(taxgsp)]<-0

cont<-read.csv("StateContinguity.csv")
cont[is.na(cont)]<- 0

##Loop through states
##Grab states which touch that state and store count, grab tax levels
##Average tax levels and input into state year neighbor average tax rate
statenames<-state.name[state.name !="Alaska" & state.name !="Hawaii"]
sn<-colnames(cont)
neighbordata<-data_frame(State=NA, Year=NA, busntaxprop_neighb=NA, totaxfull_neighb=NA,
                         busntaxchangepcap_neighb=NA)
obsyears<-1963:2016
k<-0
for (i in 1:48){
  ##Grab state
  statesub<-statenames[i]
  for(j in 1:54){
    k<-k+1
    #Iterate row counter
    subyear<-obsyears[j]
    ##Grab which states touch ith state
    #Find rows in continguity matrix which match substate, 
    #adjust index to grab names from state.name
    substates<-state.name[which(cont[cont$State==statesub,]==1)-1]
    
    ##Create frame with matching year and state neighbordata
    subframe<-filter(taxgsp, Year==subyear & State %in% substates)
    
    ##Calculate mean and input into dataframe
    neighbordata[k,1]<-statesub
    neighbordata[k,2]<-subyear
    neighbordata[k,3]<-mean(subframe$busntaxprop)
    neighbordata[k,4]<-mean(subframe$totaxprop)
    neighbordata[k,5]<-mean(subframe$BusinessRevenueChange)
  }
  
}
warnings()
##Combine neighbor tax rates to main tax frame
taxgsp<-left_join(taxgsp, neighbordata, by=c("State"="State", "Year"="Year"))


####Southern States####
south<-read.csv("SouthernStates.csv")
####Bind Data together####



##Bind Income Pcap Pct Change to Unemployment
FINAL<-left_join(INC_PCAP, unemp_pct, by=c("State"="State", "Year"="Year"))

##Add campaign finance regs, state legislature tax expenditure limits/supermajority tax requirements
FINAL<-left_join(FINAL, CFR, by=c("State"="State", "Year"="Year"))

##Add union membership levels
FINAL<-left_join(FINAL, STATEUNION, by=c("State"="State", "Year"="Year"))

##Add legislative professionalism
FINAL<-left_join(FINAL, outframe, by=c("State"="state", "Year"="year"))

##Add chamber control
FINAL<-left_join(FINAL, pc, by=c("State"="state.name", "Year"="year"))

##Add tax variables
FINAL<-left_join(FINAL, taxgsp, by=c("State"="State", "Year"="Year"))

##Add south
FINAL<-left_join(FINAL, south, by=c("State"="State"))

FINAL[is.na(FINAL)]<-0
##Save Final
#write.csv(FINAL, "ExtensionFinal.csv")



##ExtensionFinal.csv is on dropbox and has all cleaned variables. Other spreadsheets used above have also
##been added to dropbox.
FINAL<-read.csv("ExtensionFinal.csv", row.names=1)
FINAL<-filter(FINAL, Year > 1971 & Year < 2017 & State !="Nebraska" & State !="Alaska" & State !="Hawaii")




##Check for NAs
which(is.na(FINAL), arr.ind=TRUE)
##Nebraska has no party control data, 
####Variable date ranges
##IncPcap: 1960-2017
##pctchangeinc_pcap 1961-2017
##UnemploymentRate: 1970-2017
##UnemploymentChange: 1971-2017
#CorpIndExpBan1960-2017
##"UnionIndExpBan"  1960-2017       
##"CorporateContBan"   1960-2017 
##"UnionContBan"    1960-2017       
##"SuperMajTaxRate" 1960-2017       
##"SpendingLimit" 1960-2017         
##"RevenueLimit"    1960-2017
##"Union_Pct"     1964-2017      
##"bowen_legprof_firstdim" 1973-2014 (Data starts)
##"bowen_legprof_seconddim" 1973-2014
##"hs_dem_per_2pty"  1960-2017
##"sen_dem_per_2pty"   1960-2017   
##"gov_party"   1960-2017         
##"GSP" 1963-2016              
##"busntaxprop" 1963-2016     
##"totaxprop"  1963-2016     

####PLM Analysis####
fs<-pdata.frame(FINAL, index=c("State", "Year"))##sets up panel data like xtset in stata

##Create model with state and year fixed effects, lagged and differenced values for each independent variable
ecm<-plm(busntaxprop~diff(inc_pcap)+diff(UnemploymentRate)+diff(CorpIndExpBan)+diff(UnionIndExpBan)+
           diff(CorporateContBan)+diff(UnionContBan)+diff(SuperMajTaxRate)+diff(SpendingLimit)
         +diff(RevenueLimit)+diff(Union_Pct)+diff(bowen_legprof_firstdim)+
           diff(bowen_legprof_seconddim)+diff(sen_dem_per_2pty)+diff(gov_party)+
           diff(busntaxprop_neighb)+
           
           lag(inc_pcap)+lag(UnemploymentRate)+lag(CorpIndExpBan)+lag(UnionIndExpBan)+
           lag(CorporateContBan)+lag(UnionContBan)+lag(SuperMajTaxRate)+lag(SpendingLimit)
         +lag(RevenueLimit)+lag(Union_Pct)+lag(bowen_legprof_firstdim)+
           lag(bowen_legprof_seconddim)+lag(sen_dem_per_2pty)+lag(gov_party)+
             lag(busntaxprop_neighb)+lag(CorpIndExpBan*sen_dem_per_2pty)+lag(UnionIndExpBan*sen_dem_per_2pty)+
           lag(CorporateContBan*sen_dem_per_2pty)+lag(UnionContBan*sen_dem_per_2pty),
         data=fs, index = c("State","Year"),method = "within", effect = "twoways")

summary(ecm)
coeftest(ecm, vcov.=vcovBK)##Implement panel corrected standard errors



##ECM model with two lags
ecm<-plm(busntaxprop~diff(inc_pcap)+diff(UnemploymentRate)+diff(CorpIndExpBan)+diff(UnionIndExpBan)+
           diff(CorporateContBan)+diff(UnionContBan)+diff(SuperMajTaxRate)+diff(SpendingLimit)
         +diff(RevenueLimit)+diff(Union_Pct)+diff(bowen_legprof_firstdim)+
           diff(bowen_legprof_seconddim)+diff(hs_dem_per_2pty)+diff(sen_dem_per_2pty)+diff(gov_party)+
           diff(busntaxprop_neighb)+
           ##First lags
           lag(inc_pcap)+lag(UnemploymentRate)+lag(CorpIndExpBan)+lag(UnionIndExpBan)+
           lag(CorporateContBan)+lag(UnionContBan)+lag(SuperMajTaxRate)+lag(SpendingLimit)
         +lag(RevenueLimit)+lag(Union_Pct)+lag(bowen_legprof_firstdim)+
           lag(bowen_legprof_seconddim)+lag(hs_dem_per_2pty)+lag(sen_dem_per_2pty)+lag(gov_party)+
             lag(busntaxprop_neighb)
           ##Second lags
         +lag(inc_pcap,2)+lag(UnemploymentRate,2)+lag(CorpIndExpBan,2)+lag(UnionIndExpBan,2)+
           lag(CorporateContBan,2)+lag(UnionContBan,2)+lag(SuperMajTaxRate,2)+lag(SpendingLimit,2)
         +lag(RevenueLimit,2)+lag(Union_Pct,2)+lag(bowen_legprof_firstdim,2)+
           lag(bowen_legprof_seconddim,2)+lag(hs_dem_per_2pty,2)+lag(sen_dem_per_2pty,2)+lag(gov_party,2)+
             lag(busntaxprop_neighb,2),
         data=fs, index = c("State","Year"), method = "within", effect = "twoways")
summary(ecm)


## National Association of State Budget Offices Annual Reports of Revenue Changes per fiscal year for 
##state personal income taxes, state corporate income taxes, sales taxes, and miscellaneous taxes which usually
##are very niche business taxes. The sum of these forecasted revenue changes is divided by population to yield
##the dependent variable. Data are only available from 1988-2016. 
##Create model with state and year fixed effects, lagged and differenced values for each independent variable

FINAL<-read.csv("ExtensionFinal.csv", row.names=1)
FINAL<-filter(FINAL, Year > 1987 & Year < 2017 & State !="Nebraska" & State !="Alaska" & State !="Hawaii")
FINAL<-mutate(FINAL, BigTaxCut=ifelse(BusinessTaxChange_pcap < -10, 1, 0), BigTaxHike=ifelse(BusinessTaxChange_pcap > 10, 1, 0))
fs<-pdata.frame(FINAL, index=c("State", "Year"))
##Model fit is very poor when modeling tax cuts linearly in error correction model
ecm<-plm(BusinessTaxChange_pcap~diff(inc_pcap)+diff(UnemploymentRate)+diff(CorpIndExpBan)+diff(UnionIndExpBan)+
           diff(CorporateContBan)+diff(UnionContBan)+diff(SuperMajTaxRate)+diff(SpendingLimit)
         +diff(RevenueLimit)+diff(Union_Pct)+diff(bowen_legprof_firstdim)+
           diff(bowen_legprof_seconddim)+diff(hs_dem_per_2pty)+diff(sen_dem_per_2pty)+diff(gov_party)+diff(GSP)+
           diff(busntaxprop_neighb)+
           
           lag(inc_pcap)+lag(UnemploymentRate)+lag(CorpIndExpBan)+lag(UnionIndExpBan)+
           lag(CorporateContBan)+lag(UnionContBan)+lag(SuperMajTaxRate)+lag(SpendingLimit)
         +lag(RevenueLimit)+lag(Union_Pct)+lag(bowen_legprof_firstdim)+
           lag(bowen_legprof_seconddim)+lag(hs_dem_per_2pty)+lag(sen_dem_per_2pty)+lag(gov_party)+lag(GSP)+
           lag(busntaxprop_neighb)+Southern,
         data=fs, index = c("State","Year"), method = "within", effect = "twoways")
##What do changes in dependent variable look like?
fslook<-fs[which(fs$BusinessTaxChange_pcap !=0),]
fslook<-arrange(fslook, BusinessTaxChange_pcap)


##look at distribution of tax revenue per capita changes
plot(density(fs$BusinessTaxChange_pcap))
quantile(fs$BusinessTaxChange_pcap, probs= seq(0, 1, 0.05))##10th percentile chage is $-11 per capita, while 90th is 
##11.73->Perhaps the more relevant number is what is the probability of seeing a big tax cut in a given year




##Modeling changes in tax cut revenue as dependent variable for large tax increase or decrease
##Percent of state reporting union membership is positively related to state passing large tax
##hike, operationalized as more than $10 per capita change in tax revenue of sales, corporate income tax,
##
FINAL<-mutate(FINAL, BigTaxCut=ifelse(BusinessTaxChange_pcap < -10, 1, 0), BigTaxHike=ifelse(BusinessTaxChange_pcap > 10, 1, 0))
fs<-pdata.frame(FINAL, index=c("State", "Year"))
ecm<-pglm(BigTaxHike~inc_pcap+UnemploymentRate+CorpIndExpBan+UnionIndExpBan+
           CorporateContBan+UnionContBan+SuperMajTaxRate+SpendingLimit
         +RevenueLimit+Union_Pct+bowen_legprof_firstdim+
           bowen_legprof_seconddim+hs_dem_per_2pty+sen_dem_per_2pty+gov_party+busntaxprop_neighb+
           Union_Pct*UnionContBan+lag(BigTaxCut,4),
         data=fs, family=binomial(link = "logit"), effect = "individual")
    
summary(ecm)
##Model shows that union membership is positively related to the probability that a state sees a tax cut in a given year
##Interestingly, partisan control variables are not significant suggesting that the degree of union strength is the
##relevant factor in determining tax rates, not the power of the Democratic party per se as the literature on partisan
##effects of taxation suggests.




